# 04_part_i_condition.R
# Produces results from Part I: Which Categories Condition Eligibility 
#                               for Greater Priority?
# Produces:
# - Main Figure 5 - Map of Co-occurrences 
# - Main Figure 6 - Most Contingent Categories
# - Figure S2: Degree centrality in co-occurrence network with
#                    alpha = 0.5
# - Table S7: Degree centrality in co-occurrence network 
#                   for different alpha values.
# - Table S8: Degree centrality in co-occurrence network 
#                   under different weighting. 

library(tidyverse)
library(igraph)
library(GGally) 
library(stringr)
library(ggthemes)
library(shadowtext)
library(tnet)
library(gridExtra)
library(grid)
library(xtable)
library(here)

# Load data on PHAs in sample
pha_pref_ref <- readRDS(here("data", "pha_pref_ref.RDS"))
# Load data on PHA characteristics
pha_all <-  readRDS(here("data", "pha_all.RDS")) %>% 
  # Create variable for percent of all vouchers administered
  # by given PHA
  mutate(hud_perc_vouchers = hud_med_month_vouchers / sum(hud_med_month_vouchers) * 100)
# Load coded preference data
codes <- readRDS(here("data", "codes_expanded.RDS"))

################################################
# Build a co-occurence network
# with undirected ties between categories that 
# are paired within preferences 
################################################

# Create data frame and matrix of preferences
pref <- codes %>%
  ungroup() %>%
  # Only select rows for housing authorities with preferences
  filter(!is.na(unit_single)) %>%
  dplyr::select(unit_single:special_defs)  

# Read in table with preference descriptions 
pref_descrip <- read_csv(here("data", "pref_summary_final_122022.csv")) %>%
  mutate(category = str_replace_all(category, " ", "_"))

# Create reference to map different types of category to 
# different categories
num_cats <- length(table(pref_descrip$type_new_long))
colors_bars <- colorblind_pal()(num_cats)
colors_ref <- data.frame(type_new_long = names(table(pref_descrip$type_new_long)),
  node_color = colorblind_pal()(num_cats))

colors_ref <- colorblind_pal()(num_cats)
names(colors_ref) <- names(table(pref_descrip$type_new_long))

# Align order for nicer looking column labels
pretty_colnames <- data.frame(category = colnames(pref)) %>%
  left_join(pref_descrip, by = "category") %>%
  select(label, type_new_long)

pref_m <- as.matrix(pref)
colnames(pref_m) <- pretty_colnames$label

# Calculate co-occurence 
cooccur <- t(pref_m) %*% pref_m
diag(cooccur) <- 0 # zero out diagonal 

net_cooccur <- graph_from_adjacency_matrix(cooccur, mode = "undirected", 
                                           diag = F, weighted = TRUE)

# Set vertex attribute of color by type of category
net_cooccur <- set_vertex_attr(net_cooccur, "type",
                               value = as.character(pretty_colnames$type_new_long))


#################################
# Calculate centrality measures #
#################################

# Degree
deg_cooccur <- igraph::degree(net_cooccur, loops = FALSE)

# Node strength (sum of the edge weights of the adjacent edges for each vertex)
strength_cooccur <- strength(net_cooccur, loops = FALSE)

# Set up for Opsahl's generalization of degree centrality
# for weighted networks 
tnet_obj <- as.tnet(cooccur, type =  "weighted one-mode tnet")

# Calculate degree centrality given different alpha values 
calc_deg_alpha <- function(alpha, tie_count, strength) {
  degree_cent <- (tie_count^(1 - alpha)) * (strength^alpha)
  return(degree_cent)
}

alphas <- seq(0, 1, by = 0.25)
alpha_lookup <- data.frame(alpha_seq = paste0("V", seq(1, 5)),
                           alpha = alphas)

deg_alpha <- as.data.frame(sapply(alphas, 
                                  FUN = calc_deg_alpha, deg_cooccur, 
                                  strength_cooccur)) %>%
  mutate(category = row.names(.)) %>%
  gather(key = "alpha_seq", "value", -category) %>%
  left_join(alpha_lookup, by = "alpha_seq") %>%
  select(-alpha_seq) %>%
  spread(alpha, value) %>%
  mutate(rank_0 = dense_rank(-`0`),
         rank_25 = dense_rank(-`0.25`),
         rank_50 = dense_rank(-`0.5`),
         rank_75 = dense_rank(-`0.75`),
         rank_100 = dense_rank(-`1`)) %>%
  arrange(rank_50) %>%
  mutate(category = gsub("_", " ", category)) %>%
  left_join(select(pref_descrip, -category), by = c("category" = "label")) %>%
  select(category, rank_0, `0`, rank_25, `0.25`, 
         rank_50, `0.5`, rank_75, `0.75`,
         rank_100, `1`, type_new_long, cfr_designation) 

#####################
# Main Text Outputs #
#####################

# Produce Figure 5: Map of Co-occurrences 
# Create network plot 
# with nodes scaled by degree centrality 
# Align column order and add degree centrality as a node attribute
pretty_colnames <- pretty_colnames %>%
  left_join(select(deg_alpha, category, `0.5`), 
            by = c("label" = "category")) %>%
  rename(degree_cent_50 = `0.5`)

net_cooccur <- set_vertex_attr(net_cooccur, "degreecent",
                               value = pretty_colnames$degree_cent_50)

# Plot network diagram
set.seed(48105) 
ggnet_init <- ggnet2(net_cooccur, mode = "kamadakawai",
                     edge.size = (E(net_cooccur)$weight * 0.032),
                     edge.alpha = .75, alpha = 0.9, size = "degreecent", 
                     max_size = 18, color = "type", palette = colors_ref, 
                     color.legend = "Category type") 
# Manually shift a few labels to prevent overlap
# Manually move the time on waitlist isolate closer to rest of plot
ggnet_init$data$y[ggnet_init$data$label == "Time on waitlist"] <- 0.41
ggnet_init$data$x[ggnet_init$data$label == "Time on waitlist"] <- 0.5
# Manually move unemployment insurance to left to prevent being cut off
ggnet_init$data$x[ggnet_init$data$label == "Unemployment insurance"] <- 0.975
ggnet_init$data$y[ggnet_init$data$label == "Displaced due to demolition/rehab"] <- 0.65
ggnet_init$data$y[ggnet_init$data$label == "Homelessness"] <- 0.62
ggnet_init$data$y[ggnet_init$data$label == "Organizational referral or affiliation"] <- 0.695
# Rename a couple nodes to fix overlap issues
names_copy <- names(V(net_cooccur))
names_copy[names(V(net_cooccur)) == "Displaced due to demolition/rehab"] <- "Displaced, demolition/rehab"
names_copy[names(V(net_cooccur)) == "Displaced due to disaster/fire"] <- "Displaced, disaster/fire"
names_copy[names(V(net_cooccur)) == "Displaced due to gov't action"] <- "Displaced, gov't action"

ggnet_init +
  guides(size = FALSE) + 
  shadowtext::geom_shadowtext(aes(label = names_copy),
                              color = "black", bg.color = "white", bg.r = 0.17,
                              size = 3.9) + 
  theme(legend.position="bottom", legend.box = "horizontal",
        legend.text=element_text(size = 15),
        legend.title=element_text(size = 15)) + 
  guides(color = guide_legend(override.aes = list(size = 15)))

ggsave(here("figs", "cooccur_network.png"), plot = last_plot(),
       device = "png", width = 13.5, height = 10,
       dpi = 350)


# Produce Figure 6: Most Contingent Categories  
# Summarize how often a given category shows up across preferences
pref_appearances <- pref %>%
  gather(key = "category") %>%
  group_by(category) %>%
  summarize(tot_count = sum(value)) %>%
  left_join(select(pref_descrip, category, label), 
            by = c("category"))

# Calculate what proportion of all appearances 
# of a given category are with a specific
# second category 
cooccur_proportion <- cooccur %>%
  as.data.frame() %>%
  mutate(category = row.names(.)) %>%
  gather("category_to", "value", -category) %>%
  filter(value > 0) %>% 
  rename(category_from = category) %>%
  left_join(pref_appearances, by = c("category_from" = "label")) %>%
  filter(category_from != category_to) %>%
  mutate(prop_total_from = value / tot_count) %>%
  arrange(desc(prop_total_from))

# For each category, identify the 
# category it is most often paired with
top_dependency <- cooccur_proportion %>%
  group_by(category_from) %>%
  arrange(desc(prop_total_from)) %>%
  filter(row_number()==1) %>%
  select(category_from, category_to, prop_total_from)

# Identify categories that rarely independently 
# constitute a preference
dependent_cat <- codes %>%
  ungroup() %>%
  # Only select rows for housing authorities with preferences
  filter(!is.na(unit_single)) %>%
  select(pha_code, unit_single:special_defs) %>%
  mutate(row_sum = rowSums(.[2:ncol(.)])) %>%
  # Filter to only rows where there was no co-occurrence (preference
  # based on only a single category)
  filter(row_sum == 1) %>%
  select(-row_sum, -pha_code) %>%
  gather(key = "category") %>%
  group_by(category) %>%
  # Count number of times category appeared alone
  summarize(independ_count = sum(value)) %>%
  left_join(pref_appearances, by = "category") %>%
  # Calculate proportion of appearances where the category
  # appears independently 
  mutate(prop_independ = independ_count / tot_count) %>%
  arrange(prop_independ) %>%
  mutate(prop_depend = 1 - prop_independ) %>%
  # Join on info about most common pairing
  # for each category
  left_join(top_dependency, by = c("label" = "category_from")) %>%
  # Join on preference label
  left_join(pref_descrip, c("label", "category"))

# Produce figure illustrating 25 most dependent categories 
dependent_cat_sub <- dependent_cat %>%
  ungroup() %>%
  filter(row_number() <= 25) %>%
  arrange(prop_depend) %>%
  mutate(fix_row_number = row_number())

colors_labels <- ifelse(colors_bars == "#000000", "#FFFFFF", "#000000")

p_depend <- dependent_cat_sub %>%
  ggplot(aes(x = reorder(label, fix_row_number), 
             y = prop_depend, fill = type_new_long, 
             # For setting color of interior label
             color = ifelse(type_new_long %in% 
                              c("Age and household structure", 
                                "Ties to community or formal institutions"),
                            "#000000", "#FFFFFF"))) +
                              geom_bar(stat = "identity", 
                                       width = 0.85,
                                       size = 0) +
  # Set color of interior label
  scale_color_manual(values = colors_labels) +
  # Set color of bars 
  scale_fill_manual(values = colors_bars) +
  guides(colour = FALSE) + 
  ylab("Proportion of times paired with another category") + xlab("Category") + 
  labs(fill = "Category type") + 
  coord_flip() +
  geom_text(aes(label = ifelse(cfr_designation == TRUE, "CFR", "")), 
            size = 2.2, hjust = 1.2, show.legend = FALSE) +
  theme_minimal() + 
  theme(legend.position = "bottom")

# Create a table grob with strongest dependency
tab <- dependent_cat_sub %>%
  arrange(desc(fix_row_number)) %>%
  select(category_to) %>%
  rename(`Paired most often with...` = category_to) %>%
  tableGrob(rows = NULL, theme=ttheme_default(base_size = 10, core.just = "left", 
                                              core = list(fg_params = list(hjust = 0, x = .05)),
                                              colhead = list(fg_params = list(hjust = 0, x = .05)),
                                              padding = unit(c(0, 2.455), "mm")))

tab$widths <- unit(rep(1 / ncol(tab), ncol(tab)), "npc")

# Get legend 
leg <- lemon::g_legend(p_depend)

g <- grid.arrange(arrangeGrob(arrangeGrob(arrangeGrob(nullGrob(), 
                                                      p_depend + guides(fill=FALSE), 
                                                      heights = c(1,12)),
                                          arrangeGrob(nullGrob(), 
                                                      tab, nullGrob(), 
                                                      heights = c(1.9,8,2.1)), 
                                          widths=c(12, 5)), 
                              leg, textGrob("CFR     Discussed in Code of Federal Regulations", 
                                            gp=gpar(fontsize = 9, x = 0)), 
                              heights = c(13, 2, 0.4)))

ggsave(here("figs", "dependent_cat.png"),
       plot = g, device = "png",
       width = 9, height = 7, dpi = 400)


#############################
# ONLINE SUPPLEMENT OUTPUTS #
#############################

# Produce Figure S2: Degree centrality in co-occurrence network with
#                    alpha = 0.5
ggplot(deg_alpha,
       aes(x = reorder(category, `0.5`), y = `0.5`, fill = type_new_long, 
           color = ifelse(type_new_long %in% c("Age and household structure", 
                                               "Ties to community or formal institutions"),
                          "#000000", "#FFFFFF"))) +
                            geom_bar(stat = "identity", 
                                     width = 0.85, size = 0)  +
  # Set color of interior label
  scale_color_manual(values = colors_labels) +
  # Set color of bars 
  scale_fill_manual(values = colors_bars) +
  guides(colour = FALSE) + 
  ylab("Degree centrality") + xlab("Category") + labs(fill = "Category type") + 
  coord_flip() +
  geom_text(aes(label = ifelse(cfr_designation == TRUE, "CFR", "")), 
            size=2.2, hjust = 1.2, show.legend  = FALSE) +
  annotate(geom = "text", color = "#000000", size= 3, 
           label = "CFR     Discussed in Code of Federal Regulations",
           x = 23.0, y = 162, hjust = 0) +
  theme_minimal() + 
  theme(legend.position = c(.75, .2)) 

ggsave(here("figs", "supp_deg_cent.png"), plot = last_plot(),
       device = "png", width = 10, height = 9,
       dpi = 300)

# Produce Table S7: Degree centrality in co-occurrence network 
#                   for different alpha values.
# Generate table comparing rankings for different alpha
print(xtable(deg_alpha[, -c(12:14)],
             digits = c(0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1)), 
      include.rownames  = FALSE) 

# Produce Table S8: Degree centrality in co-occurrence network 
#                   under different weighting. 
# Alternative specification weighting network by number of vouchers 
# With alpha = 0.5
pref_vms <- codes %>%
  ungroup() %>%
  # Only select rows for housing authorities with preferences
  filter(!is.na(unit_single)) %>%
  # Join on perc of vouchers administered
  left_join(select(pha_all, pha_code, hud_perc_vouchers), by = "pha_code") %>%
  # Weigh each edge by the percent of all vouchers the PHA administers 
  mutate_at(vars(unit_single:special_defs), list(~. * hud_perc_vouchers)) %>%
  select(unit_single:special_defs)  

pref_vms_m <- as.matrix(pref_vms)
colnames(pref_vms_m) <- pretty_colnames$label
cooccur_vms <- t(pref_vms_m) %*% pref_vms_m
diag(cooccur_vms) <- 0

net_cooccur_vms <- graph_from_adjacency_matrix(cooccur_vms, mode = "undirected", 
                                               diag = F, weighted = TRUE)

deg_cooccur_vms<- igraph::degree(net_cooccur_vms, loops = FALSE)
strength_cooccur_vms <- strength(net_cooccur_vms, loops = FALSE)

# Generate table comparing rankings for even PHA weight
# vs weighting by percent of vouchers administered
alt_weighting_comp <- data.frame(vms_weight_degcent = 
                                   calc_deg_alpha(0.5, deg_cooccur_vms, 
                                                  strength_cooccur_vms)) %>%
  mutate(category = row.names(.)) %>%
  left_join(deg_alpha, by = "category") %>%
  transmute(category,
            even_weight_rank = rank_50, even_weight_degcent = `0.5`,
            vms_weight_rank = dense_rank(-vms_weight_degcent), vms_weight_degcent,
            diff_rank = even_weight_rank - vms_weight_rank) %>%
  arrange(even_weight_rank)

print(xtable(alt_weighting_comp,
             digits = c(0, 1, 0, 1, 0, 1, 0)), include.rownames  = FALSE) 
 
